## Table 1

#directory
#set working directory to the path PvP_Replication
#setwd("~/PvP_Replication")

#packages
library(tidyverse) #for data cleaning and manipulation
library(kbal) #for calculating kernel balancing weights; to install, run the following
#install.packages("devtools")
#devtools::install_github("chadhazlett/KBAL")
library(estimatr) #estimation
library(ivmodel) #iv diagnostics
library(texreg) #generate latex table 

#load main dataset
pvp_data <- read.csv("PvP_data/PvP_data_main.csv")

#transform independent variable
#create binary (hectares > 100) and log + 1 cultivation variables 
pvp_data <- pvp_data %>% 
  mutate(logcult = log(maxcult + 1), cultbinary = ifelse(maxcult > 100, 1, 0))

#ols models

#subset data to municipalities with prior FARC presence
pvp_farc_subset <- pvp_data %>% filter(farc_presence == 1)

dim_model_cont <- lm_robust(dissident_presence ~ logcult, data = pvp_farc_subset)
dim_model_bin <- lm_robust(dissident_presence ~ cultbinary, data = pvp_farc_subset)


#iv models

#prep weather and soil variables for iv
pvp_farc_subset <- pvp_farc_subset %>% mutate(
  draindummy = as.factor(ifelse(DRAIN == "M", 1, 0)), #moderate drainage
  humiddist = case_when(humid_mean < 85 ~ (85 - humid_mean)^2, humid_mean > 85 ~ (humid_mean - 85)^2, TRUE ~ 0 ), #distance from optimal humidity point
  temprangedist = case_when(maxtemp > 27 ~ (maxtemp - 27)^2, mintemp < 14  ~ (14 - mintemp)^2, TRUE ~ 0), #distance from optimal temp, wide band
)

#instrument for binary independent variable
pvp_farc_subset$glm_inst <- predict(glm(cultbinary ~ PHAQ + temprangedist + TOTN + TOTC + sun_mean + humiddist + raintot_me + draindummy, pvp_farc_subset, family = binomial(link = "logit")), type = "response")

#instrument for continuous independent variable
pvp_farc_subset$ols_inst <- predict(lm(logcult ~ PHAQ + temprangedist + TOTN + TOTC + sun_mean + humiddist + raintot_me + draindummy, pvp_farc_subset))

#transform control variables for iv models
pvp_farc_subset <- pvp_farc_subset %>% 
  mutate(logcrops = log(maxallcrops + 1), logfloods = log(floodalertmean + 1))

#iv models using iv robust (with control variables)
iv_model_cont <- iv_robust(dissident_presence ~ logcult + logcrops + logfloods | ols_inst + logcrops + logfloods, data = pvp_farc_subset, diagnostics = TRUE) 
iv_model_bin <- iv_robust(dissident_presence ~ cultbinary + logcrops + logfloods | glm_inst + logcrops + logfloods, data = pvp_farc_subset, diagnostics = TRUE) 

#first stage f statistic diagnostics
iv_model_cont_fstat <- iv_model_cont$diagnostic_first_stage_fstatistic["value"] %>% round(digits = 2)
iv_model_bin_fstat <- iv_model_bin$diagnostic_first_stage_fstatistic["value"] %>% round(digits = 2)

#anderson-rubin confidence intervals
iv_model_cont_ar <- ivmodel(Y=pvp_farc_subset$dissident_presence,D=pvp_farc_subset$logcult,Z=pvp_farc_subset$ols_inst, X = pvp_farc_subset[,c("logcrops", "logfloods")])
iv_model_bin_ar <- ivmodel(Y=pvp_farc_subset$dissident_presence,D=pvp_farc_subset$cultbinary,Z=pvp_farc_subset$glm_inst, X = pvp_farc_subset[,c("logcrops", "logfloods")])

iv_model_cont_arci <- AR.test(iv_model_cont_ar)$ci %>% round(digits = 2) %>% data.frame() %>% mutate(ci = paste(lower, upper, sep = ", "))
iv_model_bin_arci <- AR.test(iv_model_bin_ar)$ci %>% round(digits = 2) %>% data.frame() %>% mutate(ci = paste(lower, upper, sep = ", "))

#balancing model

#transform selected covariates
#create variable for highways per square km, percent of population in rural zones, log population
pvp_data <- pvp_data %>% 
  mutate(hwycover = (hwylengthkm/sqkm), ruralpct = (ruralpop14/pop14)*100, logpop = log(pop14))

#list of covariates for kernel balancing
covariate_list <- c("lit_ratio", "electric_ratio", "logpop", "ruralpct", "elev_stdev", "hwycover", "distCali", "distBogota", "distMedellin")

#subset data to FARC municipalities for main anaylses
pvp_farc_subset <- pvp_data %>% filter(farc_presence == 1)

#note that there are two cases missing covariates
pvp_farc_subset %>% 
  dplyr::select(municode, all_of(covariate_list), dissident_presence, cultbinary) %>% 
  filter(!complete.cases(.)) %>% 
  count()

#drop the two incomplete cases
pvp_farc_subset <- pvp_farc_subset %>% 
  filter(complete.cases(pvp_farc_subset[ , c(covariate_list, 'dissident_presence', 'cultbinary')]))

#calculate weights using kernel balancing
pvp_farc_subset$kbalweights <- kbal(allx = pvp_farc_subset[,covariate_list],
                                         treatment = pvp_farc_subset$cultbinary,
                                         fullSVD = TRUE, meanfirst = F)$w


kbal_model_bin <- lm_robust(dissident_presence ~ cultbinary, weights = kbalweights, data = pvp_farc_subset)

# print results as latex table
texreg(list(dim_model_bin, dim_model_cont, kbal_model_bin, iv_model_bin, iv_model_cont),
       digits = 2, include.ci = FALSE, single.row = FALSE, include.fstatistic = FALSE, 
       include.rmse = FALSE, include.rsquared = FALSE, include.adjrs = FALSE, 
       include.nobs = TRUE, stars = c(0.001, 0.01, 0.05), caption.above	= TRUE,
       custom.coef.names	= c(NA, "Coca Cultivation Binary", "Coca Cultivation Continuous", "Agriculture Control", "Flood Control"),
       custom.gof.rows = list("First Stage F-stat" = c("", "", "", iv_model_bin_fstat, iv_model_cont_fstat), "Anderson-Rubin CI" = c("", "", "", iv_model_bin_arci$ci, iv_model_cont_arci$ci)),
       caption = "Effects of coca cultivation on FARC splinter group emergence",
       custom.header = list("Difference-in-Means" = 1:2, "Covariate-Adjusted" = 3, "Instrumental Variables" = 4:5)
)

